home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Celestin Apprentice 5
/
Apprentice-Release5.iso
/
Demos
/
Evatac Software
/
Preditor 3.0
/
Tools
/
Language Module Builder
/
Sources
/
HtmlParse.c
< prev
next >
Wrap
Text File
|
1996-02-04
|
15KB
|
758 lines
/************************************************************
HtmlParse.c
C Source to Preditor 3
Language Module Code for the "HTML" language
© Copyright Evatac Software 1988-1996
All rights reserved
************************************************************/
#include "HtmlParse.h"
#include <SetupA4.h>
#include <MixedMode.h>
#include <Ctype.h>
#ifndef THINKC
#include <A4Stuff.h>
#else
#define SetCurrentA4() 0; RememberA4()
#define SetA4(x) SetUpA4()
#endif
#ifdef powerc
ProcInfoType __procinfo = LanguageUPPInfo;
#endif
// Remove the comment on the following line to include hrefs in reference popup
// #define INCLUDE_HREFS 1
static languageGlobals globals;
static ExternalCallbackBlock *callbacks;
static Boolean inMarkup, inNegation, inHeader = false;
/*
* * * * HTML LANGUAGE INDENT HANDLER * * * * * *
*/
static long _languageConvertToTabs(
Char *text,
long length,
long hardTab
)
{
int tabs = length / hardTab;
int spaces = length % hardTab;
int newLen = tabs + spaces;
while (tabs-- > 0)
*(text++) = 9;
while (spaces-- > 0)
*(text++) = ' ';
return(newLen);
}
/*
* _languageHandleIndent
*
* Indent the selected lines according to the buffer indentation settings
*/
static void _languageHandleIndent(
void *extData
)
{
long anchor, end, pos, length;
long lineStart;
long i, x, newPos = -1;
long lineNumber, endLineNumber, leading;
short spacesPerTab, hardTab;
Char text[256];
extGetSelection(callbacks, &anchor, &end);
if (anchor > end) {
pos = anchor; anchor = end; end = pos; /* Swap */
}
lineNumber = extLineFromPosition(callbacks, anchor);
endLineNumber = extLineFromPosition(callbacks, end);
/*
* Indent each line in the selection
*/
while (lineNumber <= endLineNumber) {
if (lineNumber <= 1) {
lineNumber++;
continue;
}
leading = extGetLeading(callbacks, lineNumber, &length,
&spacesPerTab, &hardTab);
lineStart = extLineToPosition(callbacks, lineNumber);
/*
* Select the leading spaces/tabs
*/
extSetSelection(callbacks, lineStart, lineStart + length);
/*
* Scan back previous lines for a line that we can relate to
*/
x = 1;
for (;;) {
if (lineNumber - x < 1)
break;
leading = extGetLeading(callbacks, lineNumber - x, &length,
&spacesPerTab, &hardTab);
end = extLineEnd(callbacks, lineNumber - x);
pos = extLineToPosition(callbacks, lineNumber - x);
/* Skip Blank lines */
if (length == (end - pos)) {
x++;
continue;
}
i = pos + length;
/*
* Perform Indention Smarts (Still under development)
*/
extScanContents(callbacks, i);
/* while (i++ < end && extNextScanCharacter(callbacks, &ch)) {
if (ch == '{') {
leading += spacesPerTab;
break;
}
} */
extDoneScan(callbacks);
/*
* Indent the line
*/
i = _languageConvertToTabs(text, leading, hardTab);
extInsert(callbacks, text, i);
if (newPos == -1)
newPos = lineStart + i;
break;
}
lineNumber++;
}
if (newPos >= 0)
extSetSelection(callbacks, newPos, newPos);
}
/*
* * * * C LANGUAGE PARSER * * * * * *
*/
/*
* _languageBuildString
*
* Build up a literal string or literal contant "foo" or 'foo'
*/
static void _languageBuildString(
languageToken *token,
int c
)
{
Int32 index = 1, size = kTokenStringSize;
int origC = c;
token->string[1] = c;
token->type = (c == '\"' ? kSymbolStringLiteral : kSymbolCharConstant);
if (c == 'l' || c == 'L') {
c = languageGetChar(&globals, callbacks);
token->string[0] = c;
index = 2;
}
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (index < size)
token->string[++index] = c;
if (c == origC)
break;
else if (c == '\\') {
c = languageGetChar(&globals, callbacks);
if (c != -1) {
if (index < size)
token->string[++index] = c;
}
}
}
token->string[0] = index; /* So string can be used as C or Pascal string */
token->string[++index] = 0;
}
/*
* _languageBuildWhiteSpace
*
* Build up a directive (i.e. #define, etc)
*/
static void _languageBuildWhiteSpace(
languageToken *token,
int c
)
{
token->type = kSymbolWhiteSpace;
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (c != ' ' && c != '\t' && c != '\v' && c != '\n' &&
c != '\r' && c != '\f' && c != '\b') {
// if (!isspace(c)) {
languageUngetChar(&globals, c);
return;
}
}
}
/*
* _languageBuildContent
*
*
*/
static void _languageBuildContent(
languageToken *token,
int c
)
{
Int32 index = 0, size = kTokenStringSize;
int origC = c;
token->type = kSymbolContent;
token->string[++index] = c;
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (c == '<') {
languageUngetChar(&globals, c);
break;
}
if (index < size)
token->string[++index] = c;
}
token->string[0] = index; /* So string can be used as C or Pascal string */
token->string[++index] = 0;
token->endLocation = globals.position;
}
/*
* _languageBuildComment
*
*/
static void _languageBuildComment(
languageToken *token,
int c
)
{
Boolean wasDash;
token->type = kSymbolComment;
globals.startLastComment = globals.position;
c = languageGetChar(&globals, callbacks);
if (c =='-') {
wasDash = false;
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (c == '-')
wasDash = true;
else if (c != '/' || !wasDash)
wasDash = false;
else
break;
}
}
}
/*
* _languageBuildNumber
*
*/
static void _languageBuildNumber(
languageToken *token,
int c
)
{
token->type = kSymbolIntConstant;
if (c == '0') {
c = languageGetChar(&globals, callbacks);
if (c == 'x' || c == 'X') {
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
;
else
break;
}
}
else {
while (c != -1) {
if (c >= '0' && c <= '7')
;
else
break;
c = languageGetChar(&globals, callbacks);
}
}
}
else { /* decimal */
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (c >= '0' && c <= '9')
;
else
break;
}
if (c == '.') {
token->type = kSymbolFloatConstant;
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (c >= '0' && c <= '9')
;
else
break;
}
}
if (c == 'e' || c == 'E') {
token->type = kSymbolFloatConstant;
c = languageGetChar(&globals, callbacks);
if (c == '-' || c == '+')
c = languageGetChar(&globals, callbacks);
while (c != -1) {
if (c >= '0' && c <= '9')
;
else
break;
c = languageGetChar(&globals, callbacks);
}
}
}
while (c != -1) {
if (c == 'l' || c == 'L' || c == 'u' || c == 'U' ||
c == 'f' || c == 'F' || c == 'h' || c == 'H')
;
else
break;
c = languageGetChar(&globals, callbacks);
}
if (c != -1)
languageUngetChar(&globals, c);
}
/*
* _languageBuildWord
*
*
*/
static void _languageBuildWord(
languageToken *token,
int c
)
{
Int32 index = 1, size = kTokenStringSize;
Char *scan, *target;
Char lowerStr[kTokenStringSize + 2];
token->type = kSymbolIdentifier;
token->string[1] = c;
while ((c = languageGetChar(&globals, callbacks)) != -1) {
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_'
|| c >= '0' && c <= '9') {
if (index < size)
token->string[++index] = c;
else
token->string[index] = '…';
}
else {
languageUngetChar(&globals, c);
break;
}
}
token->string[0] = index; /* So string can be used as C or Pascal string */
token->string[++index] = 0;
/*
* lower case the string for lookup
*/
scan = token->string;
target = lowerStr;
while (*scan != 0) {
*(target++) = tolower(*scan);
scan++;
}
*target = 0;
if (!inMarkup)
return;
/*
* Since hashing into a large reserved word table takes time, the reserved
* word table is not loaded for "function" scanning. We do our own
* limited keyword check
*/
if (!languageHasTable(&globals)) {
scan = lowerStr + 1;
if (languageCStringCompare(scan, (Char *) "href") == 0)
token->type = kSymbolReservedWord;
else if ((*scan == 'h' || *scan == 'H') &&
(scan[1] >= '0' && scan[1] <= '5'))
token->type = kSymbolReservedWord;
return;
}
else if (languageTableLookup((&globals), lowerStr + 1))
token->type = kSymbolReservedWord;
else if (languageCustomTableLookup((&globals), lowerStr + 1))
token->type = kSymbolCustomWord;
}
/*
* _languageGetNextToken
*/
static languageToken *_languageGetNextToken(void)
{
int first, second;
Int16 previousType;
languageToken *token = &globals.token;
previousType = token->type;
token->startLocation = globals.position;
token->majorType = -1;
if ((first = languageGetChar(&globals, callbacks)) == -1)
return(nil);
token->type = first;
second = languagePeekChar(&globals, callbacks);
if (!inMarkup && first != '<') {
_languageBuildContent(token, first);
return(token);
}
switch(first) {
/* "strings" */
/* 'character constants' */
case '\"':
case '\'':
if (inMarkup)
_languageBuildString(token, first);
break;
/* white space */
case ' ': case '\t': case '\v': case '\n': case '\r': case '\f': case '\b':
_languageBuildWhiteSpace(token, first);
break;
/* Markup langauge */
case '<':
if (second == '-') { /* Comment */
_languageBuildComment(token, first);
}
else {
inMarkup = true;
inNegation = second == '/';
}
break;
case '>':
inMarkup = false;
inNegation = false;
globals.startLastComment = -1;
break;
/* monographs */
case ';':
case '(':
case '#':
case ')':
case '[':
case ']':
case '}':
case '{':
case '~':
case '*':
case '%':
case '\\':
case ',':
case '?':
case '/':
break;
/* = */
case '=':
break;
/* the rest */
default:
if (first >= '0' && first <= '9')
_languageBuildNumber(token, first);
else if (first >= 'a' && first <= 'z' || first >= 'A' && first <= 'Z' ||
first == '_')
_languageBuildWord(token, first);
/* Something weird, let the parser decide. */
break;
}
token->endLocation = globals.position;
return(token);
}
void languageMain(
ExternalCallbackBlock *extCallbacks,
WindowRef window,
long options,
void *extData
);
/*
* languageMain
*
* This is the main entrypoint to the CODE module of a language module.
* The following operations are defined:
*
* kLanguageParse Parse the source file, returning positions of all tokens
* in the file.
* kLanguageFunctions Parse the source file, returning the position of just the
* functions in the source file
* kLanguageIncludes Parse the source file, returning the #include files
* kLanguageTemplate Expand macro -- insert template
* kLanguageIndentLine
* kLanguageElectric Handle electric characters (i.e. }, {, ; )
*/
void main(
ExternalCallbackBlock *extCallbacks,
WindowRef window,
long options,
void *extData
)
{
languageToken *token;
Int16 type;
Char *ptr;
long saved_a4;
saved_a4 = SetCurrentA4();
inMarkup = false;
globals.startLastComment = -1;
languageInit(&globals, extCallbacks, options);
callbacks = extCallbacks;
if (options == kLanguageTemplate) {
languageDefaultHandler(&globals, callbacks, options, extData);
}
else if (options == kLanguageIndent) {
_languageHandleIndent(extData);
}
else if (options <= kLanguageIncludes) {
/*
* Now parse the file, returning a series of valid return token types:
*
* kReference
* kKeyword
* kComment
* kCustomKeyword
* kHeader
*/
while ((token = _languageGetNextToken()) != nil) {
type = token->type;
if (type == kSymbolReservedWord) {
token->majorType = kKeyword;
ptr = token->string;
if ((ptr[1] == 'h' && ptr[2] == 'r' && ptr[0] == 4 ||
ptr[1] == 'H' && ptr[2] == 'R' && ptr[0] == 4) &&
options == kLanguageParse && inMarkup) {
_languageBuildWhiteSpace(token, 0);
/* Now get the equal sign */
if ((token = _languageGetNextToken()) != nil &&
token->type == kSymbolEqual) {
_languageBuildWhiteSpace(token, 0);
if ((token = _languageGetNextToken()) != nil &&
token->type == kSymbolStringLiteral) {
token->majorType = kReference;
token->commentLocation = globals.startLastComment;
extTokenReturn(callbacks, token);
continue;
}
}
}
if (options == kLanguageParse)
extTokenReturn(callbacks, token);
if (inMarkup &&
(ptr[1] == 'h' || ptr[2] == 'H') &&
ptr[2] >= '0' && ptr[2] <= '9' && ptr[0] == 2 &&
options != kLanguageIncludes) {
inHeader = !inNegation;
}
continue;
}
else if (type == kSymbolCustomWord) {
token->majorType = kCustomKeyword;
}
else if (type == kSymbolComment)
token->majorType = kComment;
else if (type == kSymbolContent) {
token->majorType = (inHeader ? kHeader : kContent);
if (options != kLanguageParse && inHeader) {
int len = token->string[0];
Char *ptr = token->string + 1;
/* Make sure there is something in the header */
while (len-- > 0) {
Char c = *(ptr++);
if (c != ' ' && c != '\t' && c != '\v' && c != '\n' &&
c != '\r' && c != '\f' && c != '\b' ) {
token->commentLocation = globals.startLastComment;
extTokenReturn(callbacks, token);
break;
}
}
continue;
}
}
/*
* Only return a token if it's a interesting token, and
* if we are doing a full parse
*/
if (token != nil &&
token->majorType >= 0 && options == kLanguageParse)
extTokenReturn(callbacks, token);
}
}
/*
* Clean up after ourselves
*/
languageDone(&globals, callbacks);
SetA4(saved_a4);
}